cd "S:\Katja_Aksel\JFQA index paper\v3\pseudo_data"
clear
set more 1
cap log close

log using ./logs/2_valuations.log, replace

***calculate valuations
use ./2d_all_purchases.dta, clear
keep orgnr pdate erverv_type shares
*gen number of shares in primary transactions
gen help1=shares if erverv_type!="K"
replace help1=0 if help1==.
sort orgnr pdate
*number of shares per date
bys orgnr pdate: egen help2=sum(help1)
keep orgnr pdate help2
duplicates drop
gen pyear=year(pdate)
gen pmonth=month(pdate)
egen time=group(pyear pmonth)
*number of shares per month
bys orgnr time: egen help3=sum(help2)
keep orgnr time pyear help3
duplicates drop
sort orgnr time
bys orgnr pyear: gen first=1 if _n==1
**generate the number of cumulative share per month
gen cumshares=help3 if first==1
bys orgnr: replace cumshares=cumshares[_n-1]+help3 if first==.
keep orgnr time cumshares
save ./helpfile_shares_per_month.dta, replace

****
use ./2d_all_purchases.dta, clear
merge m:1 orgnr using ./1k_population_clean.dta, keepusing(final exityear sample vcbacked founded industry)
*1,577 firms are not in the purchase data anymore
keep if _merge==3
drop _merge

gen pyear=year(pdate)
gen pmonth=month(pdate)	
*generate financing rounds unique month-year observations
egen time=group(pyear pmonth)

*cleaning up the data by dropping tiny investments
drop if amount<10000
*111,814 obs dropped
drop if shares<2
*5,645 dropped
drop if (amount/shares)<1
*10,639 dropped
sort orgnr pyear erverv_type

*keep only transactions with substantial ownership and where other investors would have had a chance to sell
drop if ownership<0.01
*77,244 obs dropped
drop if ownership>0.99
*70,772 obs dropped
*291,732 observations

**exclude the regulatory cost to establish the company or any other founder's initial equity 
**check how much equity was injected at the inception (erverv_type=T)
bys orgnr erverv_type: egen amount1=sum(amount)

**identify initial equity
gen init=amount if erverv_type=="T" & investortype==1
*54,994 transactions by entrepreneurs are excluded
replace init=amount if erverv_type=="T" & amount1<=30000 & pyear>=2012
*16,699 transactions by other investors are excluded
replace init=amount if erverv_type=="T" & amount1<=100000 & pyear<2012
*4,603 transactions by other investors are excluded
replace init=0 if init==.
bys orgnr: egen initial=sum(init)

**exclude regulatory cost
replace amount=. if init!=0
*76,296 changes (transactions excluded)
replace shares=. if amount==.

gen pamount=amount if erverv_type!="K"
gen pshares=shares if erverv_type!="K"
gen kamount=amount if erverv_type=="K"
gen kshares=shares if erverv_type=="K"
foreach t in pamount pshares kamount kshares {
						replace `t'=0 if `t'==.
						}

order orgnr pdate pmonth pyear time		
sort orgnr time	
bys orgnr time: egen primamount=sum(pamount)
bys orgnr time: egen primshares=sum(pshares)	
bys orgnr time: egen secamount=sum(kamount)
bys orgnr time: egen secshares=sum(kshares)	
keep orgnr pmonth pyear time prim* sec* vcbacked sample final exityear founded industry initial
duplicates drop
*128,907 observations

drop if primamount==0 & secamount==0
*22,857 observations dropped; 106,050 observations

gen totshares=primshares+secshares
gen totamount=primamount+secamount

foreach t in primamount primshares secamount secshares {
						replace `t'=. if `t'==0
						}

gen pprice=totamount/totshares
gen primaryprice=primamount/primshares
gen secprice=secamount/secshares
merge m:1 orgnr pyear using ./4_shares_outstanding.dta, keepusing(firmshares0101)
keep if _merge==3
drop _merge
**all merged in
merge 1:1 orgnr time using ./helpfile_shares_per_month.dta
drop if _merge==2
drop _merge

gen mv_rounds=primaryprice*(firmshares0101+cumshares)
gen mv_sec=secprice*(firmshares0101+cumshares)
gen mv_all=pprice*(firmshares0101+cumshares)

rename pyear aar
order orgnr time aar mv* 
sort orgnr time
tab sample
/*
      sample |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     53,842       50.77       50.77
          1 |     52,208       49.23      100.00
------------+-----------------------------------
      Total |    106,050      100.00

*/
drop if aar==2003
*241 observations dropped
order orgnr time pmonth aar
drop vcbacked founded sample exityear final industry
save ./4_postmoney_valuations.dta, replace
**export out of secured environment

****Add failed firms that in the end have a valuation of 0
use ./1k_population_clean.dta, clear
keep orgnr final exityear
keep if final>1 & final<6
*41,846 firms
rename exityear aar
tab aar
/*

        aar |      Freq.     Percent        Cum.
------------+-----------------------------------
       2004 |          2        0.00        0.00
       2005 |        107        0.26        0.26
       2006 |        453        1.08        1.34
       2007 |        805        1.92        3.27
       2008 |      1,413        3.38        6.64
       2009 |      1,985        4.74       11.39
       2010 |      2,030        4.85       16.24
       2011 |      2,085        4.98       21.22
       2012 |      1,966        4.70       25.92
       2013 |      2,681        6.41       32.33
       2014 |      3,167        7.57       39.89
       2015 |      3,491        8.34       48.24
       2016 |      4,059        9.70       57.94
       2017 |      5,086       12.15       70.09
       2018 |     12,516       29.91      100.00
------------+-----------------------------------
      Total |     41,846      100.00
*/

gen pmonth=12
gen time=192 if aar==2018
replace time=180 if aar==2017
replace time=168 if aar==2016
replace time=156 if aar==2015
replace time=144 if aar==2014
replace time=132 if aar==2013
replace time=120 if aar==2012
replace time=108 if aar==2011
replace time=96 if aar==2010
replace time=84 if aar==2009
replace time=72 if aar==2008
replace time=60 if aar==2007
replace time=48 if aar==2006
replace time=36 if aar==2005
replace time=24 if aar==2004

keep orgnr time pmonth aar 
merge 1:m orgnr time pmonth aar using ./4_postmoney_valuations.dta
*119 firms have some secondary valuations in the same month, replace them by zero
foreach t in mv_rounds mv_all primamount primshares totshares totamount {
replace `t'=0 if _merge!=2	
}
drop _merge
sort orgnr time
merge m:1 orgnr using ./1k_population_clean.dta
drop if _merge==2
drop _merge
compress
save ./5_postmoney_valuations_failures.dta, replace
*export from secured environment

***********************
****Purchases by investor type
use ./2d_all_purchases.dta, clear
merge m:1 orgnr using ./1k_population_clean.dta, keepusing(final exityear sample vcbacked founded industry)
*1,577 firms are not in the purchase data anymore
keep if _merge==3
drop _merge

gen pyear=year(pdate)
gen pmonth=month(pdate)	
*generate financing rounds unique month-year observations
egen time=group(pyear pmonth)

*cleaning up the data by dropping tiny investments
drop if amount<10000
*111,814 obs dropped
drop if shares<2
*5,645 dropped
drop if (amount/shares)<1
*10,639 dropped
sort orgnr pyear erverv_type

*keep only transactions with substantial ownership and where other investors would have had a chance to sell
drop if ownership<0.01
*77,244 obs dropped
drop if ownership>0.99
*70,772 obs dropped
*291,732 observations

**exclude the regulatory cost to establish the company or any other founder's initial equity 
**check how much equity was injected at the inception (erverv_type=T)
bys orgnr erverv_type: egen amount1=sum(amount)

**identify initial equity
gen init=amount if erverv_type=="T" & investortype==1
*54,994 transactions by entrepreneurs are excluded
replace init=amount if erverv_type=="T" & amount1<=30000 & pyear>=2012
*16,699 transactions by other investors are excluded
replace init=amount if erverv_type=="T" & amount1<=100000 & pyear<2012
*4,603 transactions by other investors are excluded
replace init=0 if init==.
bys orgnr: egen initial=sum(init)

**exclude regulatory cost
replace amount=. if init!=0
*76,296 changes (transactions excluded)
replace shares=. if amount==.

bys orgnr time investortype: egen typeinvestors=count(investor_id)
gen pamount=amount if erverv_type!="K"
gen pshares=shares if erverv_type!="K"
gen kamount=amount if erverv_type=="K"
gen kshares=shares if erverv_type=="K"
gen pownership=ownership if erverv_type!="K"
gen kownership=ownership if erverv_type=="K"
foreach t in pamount pshares kamount kshares pownership kownership {
						replace `t'=0 if `t'==.
						}

order orgnr pdate pmonth pyear time	investortype	
sort orgnr time	
bys orgnr time investortype: egen primamount=sum(pamount)
bys orgnr time investortype: egen primshares=sum(pshares)
bys orgnr time investortype: egen primownersh=sum(pownership)

bys orgnr time investortype: egen secamount=sum(kamount)
bys orgnr time investortype: egen secshares=sum(kshares)
bys orgnr time investortype: egen secownersh=sum(kownership)
	
keep orgnr pmonth pyear time investortype prim* sec* typeinvestors
duplicates drop
*209,511 observations

drop if pyear==2003
*511 observations dropped
order orgnr time pmonth pyear investortype
sort orgnr time pmonth pyear investortype
tab investortype
/*
1entreprene |
 ur 2family |
3repeatange |
 l 4ind 5VC |
 6corp 7fin |
     8other |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |     75,061       35.91       35.91
          2 |      5,510        2.64       38.55
          3 |     14,234        6.81       45.36
          4 |     64,962       31.08       76.44
          5 |      2,060        0.99       77.43
          6 |     39,859       19.07       96.50
          7 |        202        0.10       96.60
          8 |      7,112        3.40      100.00
------------+-----------------------------------
      Total |    209,000      100.00


*/

save ./6_investors.dta, replace
*export from secured environment


*****CALCULATE FIRM-LEVEL RETURNS
use ./5_postmoney_valuations_failures.dta, clear
**new variable cumulative equity raised
sort orgnr time
bys orgnr: gen lauf=_n
replace primamount=0 if primamount==.
**start with the initial legally required equity+founder's initial equity as the first obseration
gen cumequity=initial if lauf==1
replace cumequity=100000 if lauf==1 & founded<2012 & (cumequity==. | cumequity<100000)
replace cumequity=30000 if lauf==1 & founded>=2012 & (cumequity==. | cumequity<30000)
*add equity additionally raised in the first observation
replace cumequity=cumequity+primamount if lauf==1
*roll over
bys orgnr: replace cumequity=cumequity[_n-1]+primamount if lauf>1
drop lauf
sum cumequity, d
/*
                          cumequity
-------------------------------------------------------------
      Percentiles      Smallest
 1%        30000          30000
 5%        30000          30000
10%        30000          30000       Obs             147,536
25%       100000          30000       Sum of wgt.     147,536

50%       173700                      Mean            9891653
                        Largest       Std. dev.      3.02e+08
75%       575000       2.71e+10
90%      3114180       3.71e+10       Variance       9.15e+16
95%     1.14e+07       5.13e+10       Skewness       142.0735
99%     1.39e+08       6.90e+10       Kurtosis       26436.07

*/
**now keep only the real latest financing round 
keep if mv_rounds!=. & mv_rounds!=0
sort orgnr time
bys orgnr: keep if _n==_N
**TVPI based on the latest observable financing round
gen TVPI1=mv_rounds/cumequity 
sum TVPI1, d
/*
                            TVPI1
-------------------------------------------------------------
      Percentiles      Smallest
 1%      .437009       .0025073
 5%     .5527638       .0030021
10%     .6346151       .0037588       Obs              45,706
25%     .7179487       .0041273       Sum of wgt.      45,706

50%     .9756098                      Mean           2.316159
                        Largest       Std. dev.      214.6428
75%            1       74.50426
90%     1.694915       77.21767       Variance       46071.53
95%     3.055621       90.13834       Skewness       213.7363
99%     9.819447        45886.4       Kurtosis        45690.8

*/
keep orgnr cumequity TVPI1
save ./TVPI1.dta, replace

***now the latest valuation based on real and implicit (zero) financing rounds, also of failing firms
use ./5_postmoney_valuations_failures.dta, clear
sort orgnr time
**new variable cumulative equity raised
bys orgnr: gen lauf=_n
replace primamount=0 if primamount==.
**start with the initial legally required equity+founder's initial equity as the first obseration
gen cumequity=initial if lauf==1
replace cumequity=100000 if lauf==1 & founded<2012 & (cumequity==. | cumequity<100000)
replace cumequity=30000 if lauf==1 & founded>=2012 & (cumequity==. | cumequity<30000)
*add equity additionally raised in the first observation
replace cumequity=cumequity+primamount if lauf==1
*roll over
bys orgnr: replace cumequity=cumequity[_n-1]+primamount if lauf>1
drop lauf
**now keep the latest financing round (also zeros) 
keep if mv_rounds!=. 
sort orgnr time
bys orgnr: keep if _n==_N
**TVPI based on the latest observable financing round
gen TVPI2=mv_rounds/cumequity 
sum TVPI2, d
/*
                            TVPI2
-------------------------------------------------------------
      Percentiles      Smallest
 1%            0              0
 5%            0              0
10%            0              0       Obs              71,559
25%            0              0       Sum of wgt.      71,559

50%            0                      Mean           1.213036
                        Largest       Std. dev.       171.542
75%     .9113924       66.74348
90%            1       67.49873       Variance       29426.65
95%      1.59524       74.23264       Skewness       267.4553
99%     5.942464        45886.4       Kurtosis       71541.23

*/
rename cumequity cumequity2
keep orgnr cumequity2 TVPI2
save ./TVPI2.dta, replace

****now keep all valuations, financing rounds, secondary and implicit failing
use ./5_postmoney_valuations_failures.dta, clear
sort orgnr time
**new variable cumulative equity raised
bys orgnr: gen lauf=_n
replace primamount=0 if primamount==.
**start with the initial legally required equity+founder's initial equity as the first obseration
gen cumequity=initial if lauf==1
replace cumequity=100000 if lauf==1 & founded<2012 & (cumequity==. | cumequity<100000)
replace cumequity=30000 if lauf==1 & founded>=2012 & (cumequity==. | cumequity<30000)
*add equity additionally raised in the first observation
replace cumequity=cumequity+primamount if lauf==1
*roll over
bys orgnr: replace cumequity=cumequity[_n-1]+primamount if lauf>1
drop lauf
sum cumequity, d
**now keep only the latest financing round 
sort orgnr time
bys orgnr: keep if _n==_N
gen TVPI3=mv_all/cumequity 
sum TVPI3, d
/*
                            TVPI3
-------------------------------------------------------------
      Percentiles      Smallest
 1%            0              0
 5%            0              0
10%            0              0       Obs              78,111
25%            0              0       Sum of wgt.      78,111

50%            0                      Mean           63.27719
                        Largest       Std. dev.      15702.58
75%            1       38409.84
90%          2.1        45886.4       Variance       2.47e+08
95%     6.451613       70841.08       Skewness       279.2662
99%     55.55556        4387500       Kurtosis       78029.43

*/
rename cumequity cumequity3
keep orgnr cumequity3 TVPI3
merge 1:1 orgnr using ./TVPI1.dta
drop _merge
merge 1:1 orgnr using ./TVPI2.dta
drop _merge
merge m:1 orgnr using ./1k_population_clean.dta
drop _merge
order orgnr TVPI1 TVPI2 TVPI3 cumequity*
sum TVPI*
/*

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
       TVPI1 |     45,706    2.316159    214.6428   .0025073    45886.4
       TVPI2 |     71,559    1.213036     171.542          0    45886.4
       TVPI3 |     78,111    63.27719    15702.58          0    4387500
*/
save ./7_population_firmTVPI.dta, replace
*export from secured environment



